Setup
Loading Libraries
library(tidyverse)
library(plotly)
library(sf)
library(tigris)
library(leaflet)
library(censusapi)
Sys.setenv(CENSUS_KEY="c8aa67e4086b4b5ce3a8717f59faa9a28f611dab")
Loading and Processing Data
#configuring the data
bay_county_names <-
c(
"Alameda",
"Contra Costa",
"Marin",
"Napa",
"San Francisco",
"San Mateo",
"Santa Clara",
"Solano",
"Sonoma"
)
bay_counties <-
counties("CA", cb = T, progress_bar = F) %>%
filter(NAME %in% bay_county_names)
usa_zips <-
zctas(cb = T, progress_bar = F)
bay_zips <-
usa_zips %>%
st_centroid() %>%
.[bay_counties, ] %>%
st_set_geometry(NULL) %>%
left_join(usa_zips %>% select(GEOID10)) %>%
st_as_sf()
#Filtering and processing electric and gas data
years <- 2017:2020
quarters <- 1:4
types <- list("Electric", "Gas")
pge <- NULL
for(year in years) {
for (quarter in quarters) {
if(year == 2020){
if(quarter == 4){
next
}
}
for (type in types) {
filename <- paste0("PGE_", year, "_Q", quarter, "_", type, "UsageByZip.csv")
temp <- read_csv(filename)
if(type == "Gas") {
temp_final <-
temp %>%
filter( CUSTOMERCLASS %in%
c(
"Gas- Residential",
"Gas- Commercial") ) %>%
mutate(ZIPCODE = ZIPCODE %>% as.character()) %>%
group_by(ZIPCODE) %>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326) %>%
mutate(
TOTALKBTUs =TOTALTHM*99.9761 )%>%
select(
!c(COMBINED, AVERAGETHM, TOTALTHM, TOTALCUSTOMERS)
)
}
if(type == "Electric") {
temp_final <-
temp %>%
filter( CUSTOMERCLASS %in%
c(
"Elec- Residential",
"Elec- Commercial") ) %>%
mutate(ZIPCODE = ZIPCODE %>% as.character()) %>%
group_by(ZIPCODE) %>%
right_join(
bay_zips %>% select(GEOID10),
by = c("ZIPCODE" = "GEOID10")
) %>%
st_as_sf() %>%
st_transform(4326) %>%
mutate(
TOTALKBTUs =TOTALKWH*3.412 )%>%
select(
!c(COMBINED, AVERAGEKWH, TOTALKWH, TOTALCUSTOMERS)
)
}
pge <- rbind(pge, temp_final)
}
}
}
#sum over zipcodes
pge_final <-
pge %>%
group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
summarize(
SUMKBTUs =
sum(
TOTALKBTUs,
na.rm = T
)
)
pge_final <- filter(pge_final, YEAR != 2017 | MONTH != 9)
#isolate commercial and residential
pge_comm_gas <- filter(pge_final, CUSTOMERCLASS %in% c("Gas- Commercial"))
pge_comm_elec <- filter(pge_final, CUSTOMERCLASS %in% c("Elec- Commercial"))
pge_res_gas <- filter(pge_final, CUSTOMERCLASS %in% c("Gas- Residential"))
pge_res_elec <- filter(pge_final, CUSTOMERCLASS %in% c("Elec- Residential"))
Generate Plots
#create data sets for graphing
DATE <- seq(as.Date("2017-01-01"), by="1 month", length.out=45)
DATE <- DATE[DATE != "2017-09-01"]
CLASS <- c(rep(c("GAS"), 44), rep(c("ELECTRIC"), 44))
BTUs_comm <- c(pge_comm_gas$SUMKBTUs, pge_comm_elec$SUMKBTUs)
BTUs_res <- c(pge_res_gas$SUMKBTUs, pge_res_elec$SUMKBTUs)
DATA_comm = data.frame(DATE, CLASS, BTUs_comm)
DATA_res = data.frame(DATE, CLASS, BTUs_res)
#commercial graph
pge_comm <-
DATA_comm %>%
ggplot() +
geom_bar(
aes(
x = DATE,
y = BTUs_comm,
fill = CLASS
),
stat = "identity",
position = "stack"
) +
labs(
y = "kBTUs",
title = "Bay Area Commercial Energy Usage"
)
pge_comm %>% ggplotly()
#residential graph
pge_res <-
DATA_res %>%
ggplot() +
geom_bar(
aes(
x = DATE,
y = BTUs_res,
fill = CLASS
),
stat = "identity",
position = "stack"
) +
labs(
y = "kBTUs",
title = "Bay Area Residential Energy Usage"
)
pge_res %>% ggplotly()
## Using March 2020 as the onset of the pandemic (the first bay area counties shut down mid march), for the commercial data We can see the there is a significant decrease in power consumption in the commercial sector following the March shutdowns, which turns upward over time, as expected. Residential consumption is increased after march compared to previous years, which is again, expected.
Creating Map
#filter to residential electric in April 2019 and 2020 to only have the same set of zipcodes
april_2020 <-
pge %>%
filter(CUSTOMERCLASS == "Elec- Residential", YEAR == "2020", MONTH == "4") %>%
group_by(ZIPCODE)
april_2019 <-
pge %>%
filter(CUSTOMERCLASS == "Elec- Residential", YEAR == "2019", MONTH == "4", ZIPCODE %in% c(april_2020$ZIPCODE)) %>%
group_by(ZIPCODE)
april_2020 <-
april_2020 %>%
filter(ZIPCODE %in% c(april_2019$ZIPCODE)) %>%
group_by(ZIPCODE)
#calculate the percent change
CHANGE <- 100*(april_2020$TOTALKBTUs - april_2019$TOTALKBTUs)/april_2019$TOTALKBTUs
DATA_FINAL <- cbind(april_2019, CHANGE)
DATA_FINAL <- na.omit(DATA_FINAL)
DATA_FINAL <- filter(DATA_FINAL, CHANGE > 20 | CHANGE < -20)
#Making the plot
res_pal <- colorNumeric(
palette = "Blues",
domain =
DATA_FINAL$CHANGE
)
leaflet() %>%
addTiles() %>%
addPolygons(
data = DATA_FINAL,
fillColor = ~res_pal(CHANGE),
color = "white",
opacity = 0.5,
fillOpacity = 0.5,
weight = 1,
label = ~paste0(
round(CHANGE),
" Percent Change Between 2019 to 2020 ",
ZIPCODE
),
highlightOptions = highlightOptions(
weight = 2,
opacity = 1
)
) %>%
addLegend(
data = DATA_FINAL,
pal = res_pal,
values = ~CHANGE,
title = "Percence Change from April 2019 to April 2020 (%)"
)
The zipcodes withthe greatest percent increase in electricity usage are highlighted on the map above. The cities were identified by comparing April of 2019 residential electricity usage to April of 2020. The values were taken as a percent increase and thresholded to be greater than a 20% increase or less than -20% decrease by looking at the spread of the data. It seems that the largest increases were concentrated in the San Fransisco area. By this metric my own zipcode was actually one of those with the greatest increase. Obviously this method is very sipmle and has a lot of flaws. It is really only accounting for a very small period of time (the month of April). It also assumes that all other variables were constant between these two Aprils in subsequent years (e.g. outdoor temperature etc.). There are a lot more rigorous ways to look at this data. I could have plotted the data over time or done more complex averaging or statistical analysis.